from time import sleep

import pandas as pd
import requests
from lxml import etree

header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.69'
}


def main():
    urls = join_url()
    info_list = []
    for url in urls:
        info_list += parse_html(req_html(url))
        print(info_list)
        sleep(1)
    save_info(info_list)


def join_url():
    urls = [f'https://gz.fang.lianjia.com/loupan/nhs1pg{i}/' for i in range(61, 81)]
    return urls


def req_html(url):
    response = requests.get(url, headers=header)
    html = response.text
    return html


def parse_html(html):
    html = etree.HTML(html)
    div = html.xpath('/html/body/div[3]/ul[2]//li/div')
    info_list = []
    for i in div:
        name = i.xpath('./div[@class="resblock-name"]/a/text()')
        type_status = i.xpath('./div[@class="resblock-name"]/span/text()')
        address = i.xpath('./div[@class="resblock-location"]/a/text()|./div[@class="resblock-location"]/span/text()')
        house_type = i.xpath('./a[@class="resblock-room"]/span/text()')
        area = i.xpath('./div[@class="resblock-area"]/span/text()')
        tag = i.xpath('./div[@class="resblock-tag"]/span/text()')
        main_price = i.xpath('./div[@class="resblock-price"]/div[@class="main-price"]/span/text()')
        total_price = i.xpath('./div[@class="resblock-price"]/div[@class="second"]/text()')
        info_list.append(
            [name[0], '|'.join(type_status), '|'.join(address), '|'.join(house_type), '|'.join(area), '|'.join(tag),
             '|'.join(main_price), '|'.join(total_price)])
    return info_list


def save_info(info_list):
    df = pd.DataFrame(info_list, columns=['name', 'type_status', 'address', 'house_type', 'area', 'tag', 'main_price',
                                          'total_price'])
    df.to_csv('../static/data/info_4.csv', index=False)


if __name__ == '__main__':
    main()
